import pandas as pd

df = pd.read_csv('data/learn_pandas.csv')
# 列名
print('列名\n', df.columns)

# 前七列
print("前七列\n", df[df.columns[:7]])

# 前2行
print('前两行数据\n', df.head(2))
# 后三行
print('前两行数据\n', df.tail(3))

# 查看信息
print('查看信息\n', df.info())
# describe
print('describe\n', df.describe())

df_demo = df[['Height', 'Weight']]
print('平均值\n', df_demo.mean())
print('最大值\n', df_demo.max())

print('分位数\n', df_demo.quantile(0.25))
print('非缺失值个数\n', df_demo.count())
print('最大值对应的索引\n', df_demo.idxmax())

# 默认是对列来进行处理，1表示对行聚合
print('对行来进行聚合\n', df_demo.mean(axis=1).head())

# 唯一值的列表
print("唯一值的列表\n", df['School'].unique())
# 唯一值得个数
print("唯一值的个数\n", df['School'].nunique())

# 唯一值和其出现的频数
print('唯一值和其出现的频数\n', df['School'].value_counts())

df_demo = df[['Gender', 'Transfer', 'Name']]
print('两行重复输出第一次出现的值\n', df_demo.drop_duplicates(['Gender', 'Transfer']))
print('两行重复输出最后一次出现的值\n', df_demo.drop_duplicates(['Gender', 'Transfer'], keep='last'))
print('保留只出现过一次的数据\n', df_demo.drop_duplicates(['Name', 'Gender'], keep=False).head())

print('展示是否为重复行\n', df_demo.duplicated(['Gender', 'Transfer']).head())

df_demo = df[['Grade', 'Name', 'Height', 'Weight']].set_index(['Grade', 'Name'])
print(df_demo)
print('身高升序排序\n', df_demo.sort_values('Height').head())
print('身高降序排序\n', df_demo.sort_values('Height', ascending=False).head())

print('身高降序，体重升序\n', df_demo.sort_values(['Weight', 'Height'], ascending=[True, False]).head())

print('索引排序\n', df_demo.sort_index(level=['Grade', 'Name'], ascending=[True, False]).head())
